package com.zc.bigdata.mapreduce;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.File;
import java.io.IOException;

public class AccessApp {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // windows系统适配，还要下载hadoop-2.6.0，配置环境变量，替换windows/system32里面的2歌文件
        // 步骤还挺多
        System.setProperty("hadoop.home.dir","D://gitee//hadoop-2.6.0");

        // 创建job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        // 设置驱动类
        job.setJarByClass(AccessApp.class);

        // 设置自定义的mapper和reducer
        job.setMapperClass(AccessMapper.class);
        job.setReducerClass(AccessReducer.class);

        // 设置mapper端的聚合规则
        job.setCombinerClass(AccessReducer.class);

        // 设置mapper的输出key,value类型和reducer的输出key,value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Access.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Access.class);

        // 设置自定义分区规则
        job.setPartitionerClass(AccessPartitioner.class);
        // 设置Reducer个数
        job.setNumReduceTasks(3);

        // 提前删除输入目录，以免运行报错
        FileUtils.deleteDirectory(new File("output//access//"));

        // 设置输入文件夹和输出文件夹
        FileInputFormat.setInputPaths(job,new Path("input//access//"));
        FileOutputFormat.setOutputPath(job,new Path("output//access//"));

        // 执行job
        boolean result = job.waitForCompletion(true);
        System.out.println(result);

    }
}
